Loading libraries and get data set

library(tidyverse)
Warning: package ‘tidyverse’ was built under R version 4.1.3
Registered S3 methods overwritten by 'dbplyr':
  method         from
  print.tbl_lazy     
  print.tbl_sql      
-- Attaching packages ----------------------------------------------------------------------------------------- tidyverse 1.3.1 --
v ggplot2 3.3.6     v purrr   0.3.4
v tibble  3.1.5     v dplyr   1.0.9
v tidyr   1.1.4     v stringr 1.4.0
v readr   2.0.2     v forcats 0.5.1
Warning: package ‘ggplot2’ was built under R version 4.1.3
Warning: package ‘dplyr’ was built under R version 4.1.3
-- Conflicts -------------------------------------------------------------------------------------------- tidyverse_conflicts() --
x dplyr::filter() masks stats::filter()
x dplyr::lag()    masks stats::lag()
cali_birth <- read_csv("C:\\Users\\maldo\\OneDrive\\Desktop\\FloridaPoly\\Data vizualization\\dataviz_mini-project_02\\dataviz_mini-project_02\\data\\california_birth.csv", col_types = cols())

Checking data set

str(cali_birth)
spec_tbl_df [1,764 x 4] (S3: spec_tbl_df/tbl_df/tbl/data.frame)
 $ year   : num [1:1764] 2008 2008 2008 2008 2008 ...
 $ patcnty: chr [1:1764] "Alameda" "Alameda" "Alameda" "Alameda" ...
 $ agegrp : chr [1:1764] "Total Births" "Older Mothers (35 years old or older)" "Teen Mothers (15 years old to 19 years old)" "Typical Aged Mothers (20 years old to 34 years old)" ...
 $ count  : num [1:1764] 20470 4714 1319 14422 2464 ...
 - attr(*, "spec")=
  .. cols(
  ..   year = col_double(),
  ..   patcnty = col_character(),
  ..   agegrp = col_character(),
  ..   count = col_double()
  .. )
 - attr(*, "problems")=<externalptr> 

Interactive plot

For each county, we can see that this data set is divided into “agegrp” groups, one of which is “Total Birth”. We are going to get rid of this because the math does not make sense if we check it, and in addition, it is not required for this analysis.

births_year <- cali_birth %>% group_by(year, agegrp) %>%
    filter(agegrp != "Total Births")

births_year

Now that we have cleared it up, let’s check the percentage of each group.

births_yearngroup <- births_year %>% 
               group_by(year, agegrp) %>%
               summarize(total = sum(count))%>%
               mutate(freq = total / sum(total),
               pct = round((freq*100), 2))  
`summarise()` has grouped output by 'year'. You can override using the `.groups` argument.
births_yearngroup
library(plotly)

Attaching package: ‘plotly’

The following object is masked from ‘package:ggplot2’:

    last_plot

The following object is masked from ‘package:stats’:

    filter

The following object is masked from ‘package:graphics’:

    layout
P2.1 <- ggplot(births_yearngroup, aes(x = year, y = pct, fill = agegrp)) +
     geom_col(position = "dodge2") +
     labs(title = "Birth in California by a group of women",x = "Years",y = "Percent", fill = "Groups")+
     coord_cartesian(xlim =c(2008, 2016)) +
     scale_fill_brewer(type = "qual", palette = "Dark2")

my_plot <- ggplotly(P2.1)
my_plot

Comment

It is important to note that the percentage of typical age mothers has not changed significantly, remaining at 72.94 to 73.86.

This gives us a good perspective since they are the majority in this data set.

library(htmlwidgets)
saveWidget(my_plot, "my_plot.html")

Spatial visualization

Loading libraries and get data set

library(sf)
Linking to GEOS 3.9.1, GDAL 3.2.1, PROJ 7.2.1; sf_use_s2() is TRUE
# Load 
cali_counties <- read_sf("C:\\Users\\maldo\\OneDrive\\Desktop\\FloridaPoly\\Data vizualization\\dataviz_mini-project_02\\dataviz_mini-project_02\\data\\ca-county-boundaries\\CA_Counties\\CA_Counties_TIGER2016.shp")
cali_counties
Simple feature collection with 58 features and 17 fields
Geometry type: MULTIPOLYGON
Dimension:     XY
Bounding box:  xmin: -13857270 ymin: 3832931 xmax: -12705030 ymax: 5162404
Projected CRS: WGS 84 / Pseudo-Mercator

To see which counties contribute the most births, we’ll need to establish a new dataframe.

births_yearTotal <- cali_birth %>% 
                    group_by(year, agegrp) %>%
                    filter(agegrp == "Total Births")

births_yearTotal
colnames(births_yearTotal)[2] <- "NAME"

To make these two date sets plot, let’s mix them.

birth_map <- cali_counties %>%
  left_join(births_yearTotal, by = "NAME")

P2.2 <- ggplot(birth_map) +
         geom_sf(aes(fill = count), 
         alpha=0.9, col="white") +
         scale_fill_viridis_c(name = "Births", trans = "log2", option = "plasma") +
         labs(title = "Birth in California") 
P2.2

library("svglite")
Warning: package ‘svglite’ was built under R version 4.1.3
ggsave("Birth in California.jpg", P2.2)
Saving 7 x 7 in image

Comment

There is a county in yellow, which is LA, that has the highest number of births. There are also more in orange that could be big cities like San Francisco.

visualization of a model

library(broom)
births_yearTotal%>% 
 top_n(count, n = 5)
births_yearLA <- cali_birth %>% group_by(year, agegrp) %>%
    filter(agegrp == "Older Mothers (35 years old or older)" & patcnty == "Los Angeles")

births_yearLA
birth_model <- lm(count ~ year, data = births_yearLA)
P2.3 <- ggplot(births_year2, aes(x = year, y = count)) +
         geom_point() +
         geom_smooth(method = "lm", 
              formula = "y ~ x") + 
         theme_minimal()
P2.3

Comment As we’ve seen, LA has one of the highest birth rates in California. We’ll most likely need to compare to a similar-sized city. For the time being, we’re seeing a linear model around the year in question.

birth_model2<- tidy(birth_model, conf.int = TRUE)%>%
  filter(term != "(Intercept)")
glance(birth_model)
P2.4 <- ggplot(birth_model2,
            aes(x = estimate, 
                y = fct_rev(term))) +
            geom_pointrange(aes(xmin = conf.low, xmax = conf.high)) +
            geom_vline(xintercept = 0,  color = "purple") + 
  theme_minimal()

P2.4

LS0tDQp0aXRsZTogIkRhdGEgVmlzdWFsaXphdGlvbiAtIE1pbmktUHJvamVjdCAyIg0KYXV0aG9yOiAiRWR3aW4gTWFsZG9uYWRvIC0gYGVtYWxkb25hZG8xMTI3QGZsb3JpZGFwb2x5LmNvbWAiDQpvdXRwdXQ6IGh0bWxfbm90ZWJvb2sNCi0tLQ0KDQoNCiMjIyMgTG9hZGluZyBsaWJyYXJpZXMgYW5kIGdldCBkYXRhIHNldA0KDQpgYGB7cn0NCmxpYnJhcnkodGlkeXZlcnNlKQ0KDQpjYWxpX2JpcnRoIDwtIHJlYWRfY3N2KCJDOlxcVXNlcnNcXG1hbGRvXFxPbmVEcml2ZVxcRGVza3RvcFxcRmxvcmlkYVBvbHlcXERhdGEgdml6dWFsaXphdGlvblxcZGF0YXZpel9taW5pLXByb2plY3RfMDJcXGRhdGF2aXpfbWluaS1wcm9qZWN0XzAyXFxkYXRhXFxjYWxpZm9ybmlhX2JpcnRoLmNzdiIsIGNvbF90eXBlcyA9IGNvbHMoKSkNCmBgYA0KDQojIyMjIENoZWNraW5nIGRhdGEgc2V0DQoNCmBgYHtyfQ0Kc3RyKGNhbGlfYmlydGgpDQpgYGANCiMjIEludGVyYWN0aXZlIHBsb3QNCg0KRm9yIGVhY2ggY291bnR5LCB3ZSBjYW4gc2VlIHRoYXQgdGhpcyBkYXRhIHNldCBpcyBkaXZpZGVkIGludG8gImFnZWdycCIgZ3JvdXBzLCBvbmUgb2Ygd2hpY2ggaXMgIlRvdGFsIEJpcnRoIi4gV2UgYXJlIGdvaW5nIHRvIGdldCByaWQgb2YgdGhpcyBiZWNhdXNlIHRoZSBtYXRoIGRvZXMgbm90IG1ha2Ugc2Vuc2UgaWYgd2UgY2hlY2sgaXQsIGFuZCBpbiBhZGRpdGlvbizCoGl0IGlzIG5vdCByZXF1aXJlZCBmb3IgdGhpcyBhbmFseXNpcy4NCg0KYGBge3J9DQpiaXJ0aHNfeWVhciA8LSBjYWxpX2JpcnRoICU+JSBncm91cF9ieSh5ZWFyLCBhZ2VncnApICU+JQ0KICAgIGZpbHRlcihhZ2VncnAgIT0gIlRvdGFsIEJpcnRocyIpDQoNCmJpcnRoc195ZWFyDQpgYGANCg0KTm93IHRoYXQgd2UgaGF2ZSBjbGVhcmVkIGl0IHVwLCBsZXQncyBjaGVjayB0aGUgcGVyY2VudGFnZSBvZiBlYWNoIGdyb3VwLg0KDQpgYGB7cn0NCmJpcnRoc195ZWFybmdyb3VwIDwtIGJpcnRoc195ZWFyICU+JSANCiAgICAgICAgICAgICAgIGdyb3VwX2J5KHllYXIsIGFnZWdycCkgJT4lDQogICAgICAgICAgICAgICBzdW1tYXJpemUodG90YWwgPSBzdW0oY291bnQpKSU+JQ0KICAgICAgICAgICAgICAgbXV0YXRlKGZyZXEgPSB0b3RhbCAvIHN1bSh0b3RhbCksDQogICAgICAgICAgICAgICBwY3QgPSByb3VuZCgoZnJlcSoxMDApLCAyKSkgIA0KDQpiaXJ0aHNfeWVhcm5ncm91cA0KYGBgDQoNCg0KYGBge3J9DQpsaWJyYXJ5KHBsb3RseSkNCg0KUDIuMSA8LSBnZ3Bsb3QoYmlydGhzX3llYXJuZ3JvdXAsIGFlcyh4ID0geWVhciwgeSA9IHBjdCwgZmlsbCA9IGFnZWdycCkpICsNCiAgICAgZ2VvbV9jb2wocG9zaXRpb24gPSAiZG9kZ2UyIikgKw0KICAgICBsYWJzKHRpdGxlID0gIkJpcnRoIGluIENhbGlmb3JuaWEgYnkgYSBncm91cCBvZiB3b21lbiIseCA9ICJZZWFycyIseSA9ICJQZXJjZW50IiwgZmlsbCA9ICJHcm91cHMiKSsNCiAgICAgY29vcmRfY2FydGVzaWFuKHhsaW0gPWMoMjAwOCwgMjAxNikpICsNCiAgICAgc2NhbGVfZmlsbF9icmV3ZXIodHlwZSA9ICJxdWFsIiwgcGFsZXR0ZSA9ICJEYXJrMiIpDQoNCm15X3Bsb3QgPC0gZ2dwbG90bHkoUDIuMSkNCm15X3Bsb3QNCmBgYA0KKipDb21tZW50KioNCg0KSXQgaXMgaW1wb3J0YW50IHRvIG5vdGUgdGhhdCB0aGUgcGVyY2VudGFnZSBvZiB0eXBpY2FsIGFnZSBtb3RoZXJzIGhhcyBub3QgY2hhbmdlZCBzaWduaWZpY2FudGx5LCByZW1haW5pbmcgYXQgNzIuOTQgdG8gNzMuODYuDQoNClRoaXMgZ2l2ZXMgdXMgYSBnb29kIHBlcnNwZWN0aXZlIHNpbmNlIHRoZXkgYXJlIHRoZSBtYWpvcml0eSBpbiB0aGlzIGRhdGEgc2V0Lg0KDQpgYGB7cn0NCmxpYnJhcnkoaHRtbHdpZGdldHMpDQpzYXZlV2lkZ2V0KG15X3Bsb3QsICJteV9wbG90Lmh0bWwiKQ0KYGBgDQoNCiMjIFNwYXRpYWwgdmlzdWFsaXphdGlvbg0KDQojIyMjIExvYWRpbmcgbGlicmFyaWVzIGFuZCBnZXQgZGF0YSBzZXQNCg0KYGBge3IgbG9hZC1saWJyYXJpZXMtZGF0YSwgd2FybmluZz1GQUxTRSwgbWVzc2FnZT1GQUxTRX0NCmxpYnJhcnkoc2YpDQojIExvYWQgDQpjYWxpX2NvdW50aWVzIDwtIHJlYWRfc2YoIkM6XFxVc2Vyc1xcbWFsZG9cXE9uZURyaXZlXFxEZXNrdG9wXFxGbG9yaWRhUG9seVxcRGF0YSB2aXp1YWxpemF0aW9uXFxkYXRhdml6X21pbmktcHJvamVjdF8wMlxcZGF0YXZpel9taW5pLXByb2plY3RfMDJcXGRhdGFcXGNhLWNvdW50eS1ib3VuZGFyaWVzXFxDQV9Db3VudGllc1xcQ0FfQ291bnRpZXNfVElHRVIyMDE2LnNocCIpDQpgYGANCg0KYGBge3J9DQpjYWxpX2NvdW50aWVzDQpgYGANCg0KVG8gc2VlIHdoaWNoIGNvdW50aWVzIGNvbnRyaWJ1dGUgdGhlIG1vc3QgYmlydGhzLCB3ZSdsbCBuZWVkIHRvIGVzdGFibGlzaCBhIG5ldyBkYXRhZnJhbWUuDQoNCmBgYHtyfQ0KYmlydGhzX3llYXJUb3RhbCA8LSBjYWxpX2JpcnRoICU+JSANCiAgICAgICAgICAgICAgICAgICAgZ3JvdXBfYnkoeWVhciwgYWdlZ3JwKSAlPiUNCiAgICAgICAgICAgICAgICAgICAgZmlsdGVyKGFnZWdycCA9PSAiVG90YWwgQmlydGhzIikNCg0KYmlydGhzX3llYXJUb3RhbA0KYGBgDQoNCmBgYHtyfQ0KY29sbmFtZXMoYmlydGhzX3llYXJUb3RhbClbMl0gPC0gIk5BTUUiDQpgYGANCg0KVG8gbWFrZSB0aGVzZSB0d28gZGF0ZSBzZXRzIHBsb3QsIGxldCdzIG1peCB0aGVtLg0KDQpgYGB7cn0NCmJpcnRoX21hcCA8LSBjYWxpX2NvdW50aWVzICU+JQ0KICBsZWZ0X2pvaW4oYmlydGhzX3llYXJUb3RhbCwgYnkgPSAiTkFNRSIpDQpgYGANCg0KYGBge3J9DQoNClAyLjIgPC0gZ2dwbG90KGJpcnRoX21hcCkgKw0KICAgICAgICAgZ2VvbV9zZihhZXMoZmlsbCA9IGNvdW50KSwgDQogICAgICAgICBhbHBoYT0wLjksIGNvbD0id2hpdGUiKSArDQogICAgICAgICBzY2FsZV9maWxsX3ZpcmlkaXNfYyhuYW1lID0gIkJpcnRocyIsIHRyYW5zID0gImxvZzIiLCBvcHRpb24gPSAicGxhc21hIikgKw0KICAgICAgICAgbGFicyh0aXRsZSA9ICJCaXJ0aCBpbiBDYWxpZm9ybmlhIikgDQpQMi4yDQpgYGANCmBgYHtyfQ0KbGlicmFyeSgic3ZnbGl0ZSIpDQpnZ3NhdmUoIkJpcnRoIGluIENhbGlmb3JuaWEuanBnIiwgUDIuMikNCmBgYA0KDQoNCioqQ29tbWVudCoqDQoNClRoZXJlIGlzIGEgY291bnR5IGluIHllbGxvdyzCoHdoaWNoIGlzIExBLCB0aGF0IGhhcyB0aGUgaGlnaGVzdCBudW1iZXIgb2YgYmlydGhzLiBUaGVyZSBhcmUgYWxzbyBtb3JlIGluIG9yYW5nZSB0aGF0IGNvdWxkIGJlIGJpZyBjaXRpZXMgbGlrZSBTYW4gRnJhbmNpc2NvLg0KDQoNCiMjIHZpc3VhbGl6YXRpb24gb2YgYSBtb2RlbA0KDQpgYGB7cn0NCmxpYnJhcnkoYnJvb20pDQpgYGANCg0KYGBge3J9DQpiaXJ0aHNfeWVhclRvdGFsJT4lIA0KIHRvcF9uKGNvdW50LCBuID0gNSkNCmBgYA0KDQpgYGB7cn0NCmJpcnRoc195ZWFyTEEgPC0gY2FsaV9iaXJ0aCAlPiUgZ3JvdXBfYnkoeWVhciwgYWdlZ3JwKSAlPiUNCiAgICBmaWx0ZXIoYWdlZ3JwID09ICJPbGRlciBNb3RoZXJzICgzNSB5ZWFycyBvbGQgb3Igb2xkZXIpIiAmIHBhdGNudHkgPT0gIkxvcyBBbmdlbGVzIikNCg0KYmlydGhzX3llYXJMQQ0KYGBgDQoNCmBgYHtyfQ0KYmlydGhfbW9kZWwgPC0gbG0oY291bnQgfiB5ZWFyLCBkYXRhID0gYmlydGhzX3llYXJMQSkNCmBgYA0KDQpgYGB7cn0NClAyLjMgPC0gZ2dwbG90KGJpcnRoc195ZWFyMiwgYWVzKHggPSB5ZWFyLCB5ID0gY291bnQpKSArDQogICAgICAgICBnZW9tX3BvaW50KCkgKw0KICAgICAgICAgZ2VvbV9zbW9vdGgobWV0aG9kID0gImxtIiwgDQogICAgICAgICAgICAgIGZvcm11bGEgPSAieSB+IHgiKSArIA0KICAgICAgICAgdGhlbWVfbWluaW1hbCgpDQpQMi4zDQpgYGANCg0KKipDb21tZW50KioNCkFzIHdlJ3ZlIHNlZW4sIExBIGhhcyBvbmUgb2YgdGhlIGhpZ2hlc3QgYmlydGggcmF0ZXMgaW4gQ2FsaWZvcm5pYS4gV2UnbGwgbW9zdCBsaWtlbHkgbmVlZCB0byBjb21wYXJlIHRvIGEgc2ltaWxhci1zaXplZCBjaXR5LiBGb3IgdGhlIHRpbWUgYmVpbmcsIHdlJ3JlIHNlZWluZyBhIGxpbmVhciBtb2RlbCBhcm91bmQgdGhlIHllYXIgaW4gcXVlc3Rpb24uDQoNCmBgYHtyfQ0KYmlydGhfbW9kZWwyPC0gdGlkeShiaXJ0aF9tb2RlbCwgY29uZi5pbnQgPSBUUlVFKSU+JQ0KICBmaWx0ZXIodGVybSAhPSAiKEludGVyY2VwdCkiKQ0KYGBgDQoNCmBgYHtyfQ0KZ2xhbmNlKGJpcnRoX21vZGVsKQ0KYGBgDQoNCmBgYHtyfQ0KUDIuNCA8LSBnZ3Bsb3QoYmlydGhfbW9kZWwyLA0KICAgICAgICAgICAgYWVzKHggPSBlc3RpbWF0ZSwgDQogICAgICAgICAgICAgICAgeSA9IGZjdF9yZXYodGVybSkpKSArDQogICAgICAgICAgICBnZW9tX3BvaW50cmFuZ2UoYWVzKHhtaW4gPSBjb25mLmxvdywgeG1heCA9IGNvbmYuaGlnaCkpICsNCiAgICAgICAgICAgIGdlb21fdmxpbmUoeGludGVyY2VwdCA9IDAsICBjb2xvciA9ICJwdXJwbGUiKSArIA0KICB0aGVtZV9taW5pbWFsKCkNCg0KUDIuNA0KYGBgDQoNCg==